package com.bmft.util.jsoup;


import com.bmft.pojo.GoodContent;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

/**
 * @author Jarvan
 * @version 1.0
 * @create 2020/11/5 13:57
 */
public class CrawlerUtil {
    /**
     * * soup只能抓取HTML不能抓取ajax请求，就像python.
     * *
     * @param key
     * @return
     */
    public static List<GoodContent> getGoodsContent(String key) throws Exception {
        ArrayList<GoodContent> goodContents = new ArrayList<>();
        String url = "https://search.jd.com/Search";
        Document document = Jsoup.connect(url)
                .data("keyword",key)
                .data("enc","utf")
                .data("pvid","fd5600eacb8745d3a58aa0c7c56f31a2")
                .header("Accept","text/html")
                .header("Accept-Encoding","gzip, deflate, br")
                .header("Accept-Language","zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6")
                .header("Connection","keep-alive")
                .cookie("Cookie","__jdu=681769560; shshshfpa=80c7520c-804b-dcc4-2a23-d651877cd978-1600915241; shshshfpb=cgI9IHwfwGaiZJ6H0DUVb4A%3D%3D; __jdv=76161171|cn.bing.com|-|referral|-|1604556218599; __jda=122270672.681769560.1583156408.1601355480.1604556219.6; __jdc=122270672; areaId=4; ipLoc-djd=4-126-0-0; 3AB9D23F7A4B3C9B=NSOK2C4KZDWGVTVYJMWGLZGUD5RHBACLPPC57KGAROREWQZEWLPF3FZ5R5JDS5GN5XVXD43BOKQ4MFOMGFKYURUMXA; rkv=1.0; arp_scroll_position=0; wlfstk_smdl=snxtlw4sdx3u6qquhjn45ywa4x19a7h2; qrsc=3; shshshfp=37cdcdb358a638f1f29b4087c6789cbb; __jdb=122270672.15.681769560|6.1604556219; shshshsID=72aecc8a0a5b3c45d9557c89517534d6_13_1604557988802")
                .header("DNT","1")
                .header("Host","search.jd.com")
                .header("Referer","https://search.jd.com/Search?keyword=%E4%B9%A6%E7%B1%8D&enc=utf-8&wq=&pvid=d018fc812bcf4b08b5f0617c60b013a3")
                .header("Sec-Fetch-Dest","document")
                .header("Sec-Fetch-Mode","navigate")
                .header("Sec-Fetch-Site","same-origin")
                .header("Sec-Fetch-User","?1")
                .header("Upgrade-Insecure-Requests","1")
                .header("User-Agent","Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Mobile Safari/537.36 Edg/86.0.622.58")
                .get();
        Element goodsList = document.getElementById("J_goodsList");
        Elements uls = goodsList.getElementsByTag("ul");
        Element ul = uls.get(0);
        Elements lis = ul.getElementsByTag("li");
        for (Element li : lis) {
            String img = "https:"+li.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = li.getElementsByClass("p-price").text();
            String name = li.getElementsByClass("p-name").text();
            goodContents.add(new GoodContent(name,price,img));
        }
        return goodContents;
    }
}
